The goal of this session is to get a clean macula densa dataset to work with.
options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320## [1] 79456894976
SO.markers <- FindAllMarkers(SO1, only.pos = TRUE)
SO.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1)SO.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1) %>%
slice_head(n = 5) %>%
ungroup() -> top10
DoHeatmap(SO1, features = top10$gene) + NoLegend()markers.to.plot1 <- c("Lrp2", # PT
"Slc5a12", # PT-S1
"Slc13a3", # PT-S2
"Slc16a9", # PT-S3
"Havcr1", # Injured PT
"Epha7", # dTL
"Cryab", # dTL
"Cdh13", # dTL1
"Slc14a2", # dTL2
"Slc12a1", # TAL
"Umod", # TAL, DCT1
"Egf", # TAL, DCT1,
"Cldn10", # TAL
"Cldn16", # TAL
"Nos1", # MD
"Slc12a3", # DCT
"Pvalb", # DCT1
"Slc8a1", # DCT2, CNT
"Aqp2", # PC
"Slc4a1", # IC-A
"Slc26a4", # IC-B
"Nphs1", # Podo
"Ncam1", # PEC
"Flt1", # Endo
"Emcn", # Glom Endo
"Kdr", # Capillary Endo
"Pdgfrb", # Perivascular
"Pdgfra", # Fib
"Piezo2", # Mesangial
"Acta2", # Mural
"Ptprc", # Immune
"Cd74", # Macrophage
"Skap1", # B/T Cells
"Upk1b", # Uro
"Top2a" # Proliferation
)
DotPlot(SO1,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()SO2 <- subset(SO1, idents = c("4", "5", "6", "7"), invert = TRUE)
SO3 <- subset(SO2, features = grep("^mt-|^Gm|Rik|^Rp", rownames(SO2), invert = TRUE, value = TRUE))
DimPlot(SO2)SO3 <- SCTransform(SO3) %>%
RunPCA() %>%
FindNeighbors(dims = 1:15) %>%
FindClusters(resolution = 2) %>%
RunUMAP(dims = 1:15)## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14658 by 11529
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## There are 2 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 302 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14658 genes
## Computing corrected count matrix for 14658 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 15.87326 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Different cells and/or features from existing assay SCT
## Set default assay to SCT
## PC_ 1
## Positive: Umod, Egf, Tmem52b, Sult1d1, Sostdc1, Fabp3, Prdx5, Foxq1, Klk1, Cox6c
## Wfdc15b, Krt7, Ly6a, Ckb, Slc25a5, Cox5b, Wfdc2, Atp5g1, Cldn19, Ggt1
## Ndufa4, Cox4i1, Atp1a1, Chchd10, Atp5b, Cox8a, Gadd45g, Cox6b1, Cox7a2, Uqcr11
## Negative: Pappa2, Zfand5, Aard, Robo2, Pde10a, Wwc2, Itga4, Nadk2, S100g, Nos1
## Ramp3, Neat1, Sgms2, Ptgs2, Col4a4, Irx1, Col4a3, Tmem158, Mir6236, Ranbp3l
## Itprid2, Bmp3, Cdkn1c, Camk2d, Dctd, Mcub, Sdc4, Etnk1, Peg3, Zbtb20
## PC_ 2
## Positive: Fos, Junb, Jun, Egr1, Btg2, Hspa1b, Hspa1a, Atf3, Zfp36, Ier2
## Fosb, Socs3, Klf6, Klf2, Jund, Dusp1, Dnajb1, Rhob, Gadd45g, Tsc22d1
## Gadd45b, Klf4, H3f3b, Ubc, Cebpd, Actb, H1f2, H2bc4, Ppp1r15a, Hspb1
## Negative: Pappa2, Egf, Slc12a1, CT010467.1, Sfrp1, Mir6236, Etnk1, Umod, Wnk1, Sec14l1
## Robo2, Hsp90b1, Pde10a, Car15, Atp1b1, Col4a4, Aard, Wwc2, Mal, Itprid2
## Mgst1, Aebp1, Col4a3, Bmp3, Trim2, Nos1, Sgms2, Zbtb20, Pth1r, Tfcp2l1
## PC_ 3
## Positive: Fth1, Ubb, Ftl1, Ldhb, Fxyd2, Car15, Prdx1, Mt1, Eif1, Cd63
## Cd9, Mpc2, Mgst1, Clu, Bsg, Tmem213, Aldoa, Mdh1, Ramp3, S100a1
## Selenow, Spp1, Tmem176b, Tspo, H3f3b, Wfdc2, Atpif1, Lgals3, Tmem59, Tmbim6
## Negative: Egf, Mir6236, CT010467.1, Umod, Tmem52b, Neat1, Nme7, Malat1, Slc12a1, Kcnq1ot1
## Lars2, Etnk1, Wnk1, Dst, Sult1d1, Foxq1, Slc5a3, Atrx, Atp1b1, Pnisr
## Fos, Syne2, Gls, Ivns1abp, Hnrnpa2b1, Srrm2, Chka, Col4a4, Paxbp1, Zbtb20
## PC_ 4
## Positive: Mt1, Apoe, Mt2, Aebp1, Sostdc1, Gpx6, Fxyd2, Neat1, Ptger3, Ckb
## Fgf9, Egfl6, Ivns1abp, Defb1, Mfsd4a, Car4, Fkbp11, Chchd10, Plet1, Hspa1b
## Atp5md, Igfbp5, Cox6c, Tmem213, Atp5k, CT010467.1, Chka, Mgst3, Lpl, Atp1a1
## Negative: Pappa2, Aard, Tmem52b, Umod, Sult1d1, Tmem158, Egf, Foxq1, Mcub, Ptgs2
## Dctd, Wwc2, Iyd, Ramp3, S100g, Ptprz1, Cd9, Car15, Hsp90b1, Tmsb4x
## Defb42, Cdkn1c, Wnk1, Pth1r, Tagln2, Lcn2, Il1f6, Clu, Ctsc, Bmp2
## PC_ 5
## Positive: Hspa1b, Hspa1a, Umod, Egf, Jun, Cldn10, Pappa2, Car15, Klk1, Fos
## Sfrp1, Fth1, Ier3, Clcnkb, Atp1a1, Slc12a1, Cited2, Hspa8, Pik3r1, Cdkn1c
## Robo2, Kng2, Aard, Gsta4, Ptger3, Fau, Irx1, Ftl1, Uba52-ps, Tmem37
## Negative: S100g, Actb, Tmem52b, Igfbp7, Cryab, S100a6, Tacstd2, Tmsb10, Tmsb4x, Atf3
## Lcn2, Sult1d1, Foxq1, Crip1, Egr1, Ppia, Atp5md, Lgals1, Mir6236, Gadd45b
## Klf4, Ifitm3, Ccnd1, Ccn1, Hbegf, Serf2, Spp1, Nupr1, Cdkn1a, Krt7
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11529
## Number of edges: 359040
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7101
## Number of communities: 25
## Elapsed time: 1 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 02:13:48 UMAP embedding parameters a = 0.9922 b = 1.112
## 02:13:48 Read 11529 rows and found 15 numeric columns
## 02:13:48 Using Annoy for neighbor search, n_neighbors = 30
## 02:13:48 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 02:13:49 Writing NN index file to temp file /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp1vVbwR/filef66b3347ff1b
## 02:13:49 Searching Annoy index using 1 thread, search_k = 3000
## 02:13:51 Annoy recall = 100%
## 02:13:51 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 02:13:52 Initializing from normalized Laplacian + noise (using RSpectra)
## 02:13:52 Commencing optimization for 200 epochs, with 484894 positive edges
## 02:13:52 Using rng type: pcg
## 02:13:54 Optimization finished
DotPlot(SO3,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()## Warning: Could not find Slc5a12 in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Kdr in the default search locations, found in 'RNA'
## assay instead
## Warning: The following requested variables were not found: Slc13a3, Havcr1,
## Pdgfrb, Pdgfra, Piezo2, Upk1b
SO4 <- SCTransform(SO4) %>%
RunPCA() %>%
FindNeighbors(dims = 1:16) %>%
FindClusters(resolution = 0.7) %>%
RunUMAP(dims = 1:16)## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14577 by 11448
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## There are 2 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 292 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14577 genes
## Computing corrected count matrix for 14577 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 15.51806 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Warning: Different cells and/or features from existing assay SCT
## Set default assay to SCT
## PC_ 1
## Positive: Umod, Egf, Tmem52b, Sult1d1, Sostdc1, Fabp3, Klk1, Prdx5, Cox6c, Foxq1
## Ly6a, Wfdc15b, Wfdc2, Slc25a5, Krt7, Cox5b, Ckb, Atp5g1, Cldn19, Cox4i1
## Ndufa4, Atp1a1, Ggt1, Cox8a, Atp5b, Chchd10, Cox6b1, Cox7a2, Uqcrq, Uqcr11
## Negative: Pappa2, Zfand5, Aard, Robo2, Wwc2, Pde10a, Mir6236, Itga4, Nadk2, Neat1
## Nos1, S100g, Col4a4, Ramp3, Sgms2, Col4a3, Ptgs2, Irx1, Tmem158, Itprid2
## Ranbp3l, Bmp3, Camk2d, Sdc4, Etnk1, Cdkn1c, Zbtb20, Srrm2, Hnrnpa2b1, Peg3
## PC_ 2
## Positive: Fos, Junb, Jun, Hspa1b, Hspa1a, Btg2, Egr1, Zfp36, Atf3, Ier2
## Fosb, Socs3, Klf6, Klf2, Jund, Dnajb1, Dusp1, Tsc22d1, Rhob, Gadd45g
## H3f3b, Gadd45b, Ubc, Cebpd, Mt1, Mt2, Actb, H2bc4, H1f2, Klf4
## Negative: Mir6236, CT010467.1, Egf, Pappa2, Slc12a1, Umod, Etnk1, Wnk1, Nme7, Atp1b1
## Sfrp1, Robo2, Sptbn1, Col4a4, Sec14l1, Hsp90b1, Zbtb20, Dst, Itprid2, Tmem52b
## Lars2, Pde10a, Kcnq1ot1, App, Mal, Atrx, Col4a3, Tfcp2l1, Syne2, Nfe2l1
## PC_ 3
## Positive: Egf, Mir6236, CT010467.1, Umod, Tmem52b, Neat1, Nme7, Fos, Jun, Malat1
## Slc12a1, Junb, Kcnq1ot1, Lars2, Egr1, Sult1d1, Foxq1, Etnk1, Dst, Wnk1
## Slc5a3, Atp1b1, Atf3, Fosb, Zfp36, Btg2, Atrx, Klf6, Pnisr, Ivns1abp
## Negative: Fth1, Ubb, Ftl1, Fxyd2, Ldhb, Car15, Prdx1, Cd63, Cd9, Eif1
## Mpc2, Mgst1, Mt1, Tmem213, Bsg, Clu, Mdh1, Aldoa, Itm2b, Ramp3
## Selenow, Tmem59, S100a1, Spp1, Tmem176b, Tspo, Tmbim6, Tle5, Atpif1, Wfdc2
## PC_ 4
## Positive: Pappa2, Aard, Tmem52b, Umod, Egf, Sult1d1, Tmem158, Foxq1, Mcub, Ptgs2
## Dctd, Wwc2, Iyd, Car15, Ramp3, Ptprz1, Cd9, Hsp90b1, S100g, Wnk1
## Pth1r, Cdkn1c, Defb42, Clu, Ctsc, Tmsb4x, Tagln2, Bmp2, Col4a3, Tsc22d1
## Negative: Mt1, Apoe, Mt2, Aebp1, Sostdc1, Gpx6, Fxyd2, Ptger3, Neat1, Ckb
## Fgf9, Egfl6, Ivns1abp, Mfsd4a, Defb1, Car4, CT010467.1, Plet1, Fkbp11, Atp5md
## Chchd10, Cox6c, Atp5k, Igfbp5, Tmem213, Mgst3, Chka, Atp5mpl, Avpr1a, Lpl
## PC_ 5
## Positive: CT010467.1, Hspa1b, Hspa1a, Klk1, Pappa2, Car15, Cldn10, Mir6236, Fth1, Lars2
## Jun, Hspa8, Wfdc2, Itm2b, Ftl1, Fau, Uba52-ps, Ly6a, Sfrp1, Aard
## Pik3r1, Mal, Eef1a1, Atp1a1, Ptger3, Atp1b1, Tspo, Hspb1, Tmem176a, Gpx4
## Negative: S100g, Actb, Tmem52b, Sdc4, Abhd2, Egr1, Uroc1, Tmsb10, Ppia, Atf3
## Serf2, Slc39a1, Alkbh5, Ndufb1-ps, Cebpd, Igfbp7, Cox6c, Atp5md, Ramp3, Ndufa3
## Wfdc15b, Kdm6b, Atp5k, Atp5e, Rbm47, Ldhb-ps, Ranbp3l, Ppm1h, Atp5mpl, Rcan1
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11448
## Number of edges: 356414
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7941
## Number of communities: 15
## Elapsed time: 0 seconds
## 02:14:23 UMAP embedding parameters a = 0.9922 b = 1.112
## 02:14:23 Read 11448 rows and found 16 numeric columns
## 02:14:23 Using Annoy for neighbor search, n_neighbors = 30
## 02:14:23 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 02:14:23 Writing NN index file to temp file /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp1vVbwR/filef66b5958c9ec
## 02:14:23 Searching Annoy index using 1 thread, search_k = 3000
## 02:14:25 Annoy recall = 100%
## 02:14:25 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 02:14:26 Initializing from normalized Laplacian + noise (using RSpectra)
## 02:14:26 Commencing optimization for 200 epochs, with 483954 positive edges
## 02:14:26 Using rng type: pcg
## 02:14:28 Optimization finished
DotPlot(SO4,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()## Warning: Could not find Slc5a12 in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Kdr in the default search locations, found in 'RNA'
## assay instead
## Warning: The following requested variables were not found: Slc13a3, Havcr1,
## Pdgfrb, Pdgfra, Piezo2, Upk1b
SO4@meta.data$seurat_clusters <- factor(
SO4@meta.data$seurat_clusters,
levels = c("0", "1", "3", "4", "10", "2", "11", "5", "13", "6", "9", "14", "7", "8", "12")
)
Idents(SO4) <- SO4$seurat_clusters
## Simplifying into a few types
markers.to.plot2 <- c("Pappa2",
"Nos1",
"Egf",
"Umod",
"Cldn19",
"Foxq1",
"Sult1d1",
"Fos",
"Junb",
"Cxcl10",
"Isg15"
)
DimPlot(SO4)DotPlot(SO4,
features = markers.to.plot2,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
group.by = "seurat_clusters",
col.max = 2.5)+
coord_flip()SO4@meta.data <- SO4@meta.data %>%
mutate(subclass_MD = dplyr::case_when(
seurat_clusters %in% c(0,1,3,4,10,2,11) ~ "type_1",
seurat_clusters %in% c(5,13,6,9,14) ~ "type_2a",
seurat_clusters %in% c(7) ~ "type_2b",
seurat_clusters %in% c(8) ~ "type_3",
seurat_clusters == 12 ~ "type_4",
))
SO4@meta.data$subclass_MD <- factor(
SO4@meta.data$subclass_MD,
levels = c("type_1", "type_2a", "type_2b","type_3","type_4")
)
SO4@meta.data <- SO4@meta.data %>%
mutate(subclass2_MD = dplyr::case_when(
seurat_clusters %in% c(0,1,3,4,10,2,11) ~ "type_1",
seurat_clusters %in% c(5,13,6,9,14,7) ~ "type_2",
seurat_clusters %in% c(8) ~ "type_3",
seurat_clusters %in% c(12) ~ "type_4"
))
SO4@meta.data$subclass2_MD <- factor(
SO4@meta.data$subclass2_MD,
levels = c("type_1", "type_2","type_3","type_4")
)
Idents(SO4) <- SO4@meta.data$subclass_MD
DimPlot(object = SO4, reduction = "umap", group.by = "subclass_MD", label = TRUE)markers.to.plot3 <- c("Pappa2",
"Egf",
"Umod",
"Fos",
"Junb",
"Cxcl10",
"Isg15"
)
DotPlot(SO4,
features = markers.to.plot3,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
group.by = "subclass2_MD",
col.max = 2.5)+
coord_flip()DotPlot(SO4,
features = markers.to.plot2,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
group.by = "subclass_MD",
col.max = 2.5)+
coord_flip()Idents(SO4) <- "seurat_clusters"
SO4_DEGs <- FindAllMarkers(SO4,
only.pos = FALSE,
logfc.threshold = 0.5,
min.pct = 0.2,
min.diff.pct = 0.05,
return.thresh = 0.05)## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 10
## Calculating cluster 2
## Calculating cluster 11
## Calculating cluster 5
## Calculating cluster 13
## Calculating cluster 6
## Calculating cluster 9
## Calculating cluster 14
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 12
SO4_DEGs %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1, p_val_adj < 0.05) %>%
slice_head(n = 3) %>%
ungroup() -> top3
DoHeatmap(SO4, features = top3$gene) + NoLegend()